* schneider-troeger-create-Bs

*
*
* This file create's the 1000 simulated beta coefficients based off our 3 bootstrapping approaches (delta method is done in the next file)
* All created datasets in this file will be used in the next .do file, schneider-troeger-makeplots.do
* ----------------------------------------------------------------------------
cd "~/Dropbox/Benton-Jordan-Philips/final-JOP-replication"
clear
clear matrix
clear mata
set maxvar 80000
import excel "data/schneider-troeger/st-data.xlsx", sheet("Sheet1")  firstrow clear
gen t = _n
tsset t
keep diff_dji golf_severity golf_possum golf_negsum pal_severity pal_possum pal_negsum yug_severity yug_possum yug_negsum diff_cac diff_ftse    y91 y92 y93 y94 y95 y96 y97 y98 y99 y00 t

*they include a LDV:
gen ldiff_dji = l.diff_dji
compress
set seed 03858420


/* All approaches below save the parameter estimates in the following datasets:
	--schneider-troeger-betas-parametric.dta
	--schneider-troeger-betas-me.dta
	--schneider-troeger-betas-resid.dta
	
	Here's the coding of each beta:
	b1 =  diff_dji:golf_sever~y
	b2 =  diff_dji:golf_possum
	b3 =  diff_dji:golf_negsum
	b4 =  diff_dji:pal_severity
	b5 =  diff_dji:pal_possum
	b6 =  diff_dji:pal_negsum
	b7 =  diff_dji:yug_severity
	b8 =  diff_dji:yug_possum
	b9 =  diff_dji:yug_negsum
	b10 = diff_dji:l.diff_dji
	b11 = diff_dji:diff_cac
	b12 = diff_dji:diff_ftse
	b13 = diff_dji:_cons 
	b14 = HET: golf_sever~y
	b15 = HET: pal_sever~y
	b16 = HET: yug_severity
	b17 = HET: y91
	b18 = HET: y92
	b19 = HET: y93
	b20 = HET: y94
	b21 = HET: y95
	b22 = HET: y96
	b23 = HET: y97
	b24 = HET: y98
	b25 = HET: y99
	b26 = HET: y00
	b27 = HET: _cons
	b28 = HET: L.arch
	b29 = HET: L.tarch
	b30 = HET: L.garch
*/





* ----------------------------------------------------------------------------
*	APPROACH 1: PARAMETRIC BOOTSTRAP
* ----------------------------------------------------------------------------
arch diff_dji golf_severity golf_possum golf_negsum pal_severity pal_possum pal_negsum yug_severity yug_possum yug_negsum ldiff_dji diff_cac diff_ftse, arch(1) tarch(1) garch(1) het(golf_severity pal_severity yug_severity y91 y92 y93 y94 y95 y96 y97 y98 y99 y00) arch0(xb)
mat B = e(b)
mat VCV = e(V)
loc names: colnames VCV
di "`names'"
mat list B
clear
corr2data b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11 b12 b13 b14 b15 b16 b17 b18 b19 b20 b21 b22 b23 b24 b25 b26 b27 b28 b29 b30, n(1000) means(B) cov(VCV)
gen bootcount = _n
compress
save "data/schneider-troeger/schneider-troeger-betas-parametric.dta", replace
* ----------------------------------------------------------------------------



* ----------------------------------------------------------------------------
*	APPROACH 2: MAXIMUM ENTROPY BOOTSTRAP
* ----------------------------------------------------------------------------
* NOTE: MAKE SURE TO RUN schneider-troeger-me.R first!
clear
clear matrix
clear mata
set maxvar 80000
import excel "data/schneider-troeger/st-data.xlsx", sheet("Sheet1")  firstrow clear
gen t = _n
tsset t
keep diff_dji golf_severity golf_possum golf_negsum pal_severity pal_possum pal_negsum yug_severity yug_possum yug_negsum diff_cac diff_ftse    y91 y92 y93 y94 y95 y96 y97 y98 y99 y00 t
gen ldiff_dji = l.diff_dji
compress
set seed 03858420
gen time = _n // to join
preserve // need to csv -> dta
import delimited "data/schneider-troeger/schneider-troeger-me.csv", encoding(ISO-8859-2) clear 
drop yfirstobslastobs v1 // Note: don't need yfirstobslastobs (that's just diff_dji)
compress
save "data/schneider-troeger/schneider-troeger-me.dta", replace
restore
joinby time using "data/schneider-troeger/schneider-troeger-me.dta"
tsset

* plot synthetic y's for SI:
twoway line meboot_y_1 t, color(gray) lpattern(solid) || ///
	line meboot_y_2 t, color(gray) lpattern(solid) || ///
	line meboot_y_3 t, color(gray) lpattern(solid) || ///
	line meboot_y_4 t, color(gray) lpattern(solid) || ///
	line meboot_y_5 t, color(gray) lpattern(solid) || ///
	line meboot_y_6 t, color(gray) lpattern(solid) || ///
	line meboot_y_7 t, color(gray) lpattern(solid) || ///
	line meboot_y_8 t, color(gray) lpattern(solid) || ///
	line meboot_y_9 t, color(gray) lpattern(solid) || ///
	line meboot_y_10 t, color(gray) lpattern(solid) || ///
	line meboot_y_11 t, color(gray) lpattern(solid) || ///
	line meboot_y_12 t, color(gray) lpattern(solid) || ///
	line meboot_y_13 t, color(gray) lpattern(solid) || ///
	line meboot_y_14 t, color(gray) lpattern(solid) || ///
	line meboot_y_15 t, color(gray) lpattern(solid) || ///
	line diff_dji t, color(blue) lpattern(solid) lwidth(medthick) legend(off) lcolor(%30)
	*graph export "Bootstrap-Simulation/fall-2023-RR-submission/figures/meboot-syntheticplot.pdf", as(pdf) replace

forv i = 1/30 { // empty vectors to store B's. k = 30
	gen b`i' = .
}
* loop through the DVs:
timer on 1
loc place = 1
forv i = 1/2500 { // all me's
	noi di in y "On successful simulation #: `place'"
	if `place' == 1001 {
		* STOP; 1000 beta's created
	}
	else {
		qui replace ldiff_dji = l.meboot_y_`i' // fix the LDV to be the new l.DV
		cap arch meboot_y_`i' golf_severity golf_possum golf_negsum pal_severity pal_possum pal_negsum yug_severity yug_possum yug_negsum ldiff_dji diff_cac diff_ftse, arch(1) tarch(1) garch(1) het(golf_severity pal_severity yug_severity y91 y92 y93 y94 y95 y96 y97 y98 y99 y00) arch0(xb)
		if "`e(converged)'" == "1" {
			forv z = 1/30 {
				mat B = e(b)
				qui replace b`z' = B[1,`z'] in `place' // replace empty beta w/ estimates
			}
			loc place = `place' + 1 // move down 1 beta row
		}
		else {
		}
	}
}
timer off 1
timer list 1
keep b1-b30
gen bootcount = _n
compress
save "data/schneider-troeger/schneider-troeger-betas-me.dta", replace
* ----------------------------------------------------------------------------


* ----------------------------------------------------------------------------
*	APPROACH 3: RESIDUAL BOOTSTRAP, using Enders' suggestion of random draws for starting values
* ----------------------------------------------------------------------------
clear
clear matrix
clear mata
set maxvar 80000
import excel "data/schneider-troeger/st-data.xlsx", sheet("Sheet1")  firstrow clear
* everythign allready sorted
gen t = _n
tsset t
keep diff_dji golf_severity golf_possum golf_negsum pal_severity pal_possum pal_negsum yug_severity yug_possum yug_negsum diff_cac diff_ftse    y91 y92 y93 y94 y95 y96 y97 y98 y99 y00 t
gen ldiff_dji = l.diff_dji
compress
set seed 03858420
do "scripts-programs/bootr2.ado" // same as bootr but preserves any first-obs missing values in master dataset
do "scripts-programs/bootrone.ado" // draws a single boot of y, e for starting values.
* Usable T = [633-3477]:
keep if t >= 633 & t <= 3477
drop t
gen t = _n
tsset t

* estimate T-GARCH:
arch diff_dji golf_severity golf_possum golf_negsum pal_severity pal_possum pal_negsum yug_severity yug_possum yug_negsum ldiff_dji diff_cac diff_ftse, arch(1) tarch(1) garch(1) het(golf_severity pal_severity yug_severity y91 y92 y93 y94 y95 y96 y97 y98 y99 y00) arch0(xb)
predict nu, resid // nu_t = \sigma_t\epsilon_t
predict sigma2, variance // sigma^2_t
gen epsilon = nu/sqrt(sigma2) // rescale, nu_t/\sigma_t = \epsilon_t
su epsilon // mean-center
replace epsilon = epsilon-r(mean)
gen _sigma2 = . // for storing sigma2. This gets replaced every bootstrap
gen neg_e = . // where $D_{t-1}=1$ if $\nu_{t-1}>0$ (really epsilon > 0), and 0 otherwise.

* loop across bootstraps
forv b = 1/2500 { //2500
	noi di in y "On successful simulation #: `b'"

	* Create initial values of y and sigma2 at t=0. Previously we were trying a burn-in process but that meant we lost an additional observation
	* draw residuals:
	bootrone, resid(epsilon) // this will be the t=0 error which we need for sigma2
	loc residvalue = r(residvalue) 
	* D term
	qui replace neg_e = 1 if `residvalue' > 0 
	qui replace neg_e = 0 if `residvalue' <= 0
	* resid for t=1 error for y
	bootrone, resid(epsilon) 
	loc residvaluey = r(residvalue)
	
	* independently from resid, draw bs at same time for y and sigma2
	bootrone, yvalue(diff_dji) sigma2value(sigma2)
	
	* init sigma2 at t=1
	qui replace _sigma2 = _b[ARCH:L.arch]*`residvalue'^2*r(sigma2init) + ///
		_b[ARCH:L.tarch]*`residvalue'^2*r(sigma2init)*neg_e + ///
		_b[ARCH:L.garch]*r(sigma2init) + exp(_b[HET:_cons] + ///
		_b[HET:golf_severity]*golf_severity + ///
		_b[HET:pal_severity]*pal_severity + ///
		_b[HET:yug_severity]*yug_severity + _b[HET:y91]*y91 + ///
		_b[HET:y92]*y92 + _b[HET:y93]*y93 + _b[HET:y94]*y94 + ///
		_b[HET:y95]*y95 + _b[HET:y96]*y96 + _b[HET:y97]*y97 + ///
		_b[HET:y98]*y98 + _b[HET:y99]*y99 + _b[HET:y00]*y00) in 1
		
	* and sigma2 at t=2, which requires `residvaluey' (resid observed at t=1)
	qui replace neg_e = 1 if `residvaluey' > 0  // update D term
	qui replace neg_e = 0 if `residvaluey' <= 0
	qui replace _sigma2 = _b[ARCH:L.arch]*`residvaluey'^2*l._sigma2 + ///
		_b[ARCH:L.tarch]*`residvaluey'^2*l._sigma2*l.neg_e + ///
		_b[ARCH:L.garch]*l._sigma2 + exp(_b[HET:_cons] + ///
		_b[HET:golf_severity]*golf_severity + ///
		_b[HET:pal_severity]*pal_severity + ///
		_b[HET:yug_severity]*yug_severity + _b[HET:y91]*y91 + ///
		_b[HET:y92]*y92 + _b[HET:y93]*y93 + _b[HET:y94]*y94 + ///
		_b[HET:y95]*y95 + _b[HET:y96]*y96 + _b[HET:y97]*y97 + ///
		_b[HET:y98]*y98 + _b[HET:y99]*y99 + _b[HET:y00]*y00) in 2
	* init y at t=1 (use guess for l.y, which is bs draw r(yinit))
	qui gen y_b`b' = _b[diff_dji:_cons] + ///
		_b[diff_dji:golf_severity]*golf_severity + ///
		_b[diff_dji:golf_possum]*golf_possum + ///
		_b[diff_dji:golf_negsum]*golf_negsum + ///
		_b[diff_dji:pal_severity]*pal_severity + ///
		_b[diff_dji:pal_possum]*pal_possum + ///
		_b[diff_dji:pal_negsum]*pal_negsum + ///
		_b[diff_dji:yug_severity]*yug_severity + ///
		_b[diff_dji:yug_possum]*yug_possum + ///
		_b[diff_dji:yug_negsum]*yug_negsum + ///
		_b[diff_dji:diff_cac]*diff_cac + _b[diff_dji:diff_ftse]*diff_ftse + ///
		_b[ldiff_dji]*r(yinit) + `residvaluey'*sqrt(_sigma2) in 1
	
	* draw bs resids for t=2/T
	cap drop boot_e
	bootr2, old(epsilon) newname(boot_e) minT(1)
	qui replace neg_e = 1 if `residvalue' > 0 // update D term
	qui replace neg_e = 0 if `residvalue' <= 0
	
	* recursively gen _sigma2 and y for 2/T
	qui replace _sigma2 = _b[ARCH:L.arch]*l.boot_e^2*l._sigma2 + ///
		_b[ARCH:L.tarch]*l.boot_e^2*l._sigma2*l.neg_e + ///
		_b[ARCH:L.garch]*l._sigma2 + exp(_b[HET:_cons] + ///
		_b[HET:golf_severity]*golf_severity + ///
		_b[HET:pal_severity]*pal_severity + ///
		_b[HET:yug_severity]*yug_severity + _b[HET:y91]*y91 + ///
		_b[HET:y92]*y92 + _b[HET:y93]*y93 + _b[HET:y94]*y94 + ///
		_b[HET:y95]*y95 + _b[HET:y96]*y96 + _b[HET:y97]*y97 + ///
		_b[HET:y98]*y98 + _b[HET:y99]*y99 + _b[HET:y00]*y00) in 3/L
		
	qui replace y_b`b' = _b[diff_dji:_cons] + ///
		_b[diff_dji:golf_severity]*golf_severity + ///
		_b[diff_dji:golf_possum]*golf_possum + ///
		_b[diff_dji:golf_negsum]*golf_negsum + ///
		_b[diff_dji:pal_severity]*pal_severity + ///
		_b[diff_dji:pal_possum]*pal_possum + ///
		_b[diff_dji:pal_negsum]*pal_negsum + ///
		_b[diff_dji:yug_severity]*yug_severity + ///
		_b[diff_dji:yug_possum]*yug_possum + ///
		_b[diff_dji:yug_negsum]*yug_negsum + ///
		_b[diff_dji:diff_cac]*diff_cac + _b[diff_dji:diff_ftse]*diff_ftse + ///
		_b[ldiff_dji]*l.y_b`b' + boot_e*sqrt(_sigma2) in 2/L
}


* plot synthetic y's for SI:
twoway line y_b1 t, color(gray) lpattern(solid) || ///
	line y_b2 t, color(gray) lpattern(solid) || ///
	line y_b3 t, color(gray) lpattern(solid) || ///
	line y_b4 t, color(gray) lpattern(solid) || ///
	line y_b5 t, color(gray) lpattern(solid) || ///
	line y_b6 t, color(gray) lpattern(solid) || ///
	line y_b7 t, color(gray) lpattern(solid) || ///
	line y_b8 t, color(gray) lpattern(solid) || ///
	line y_b9 t, color(gray) lpattern(solid) || ///
	line y_b10 t, color(gray) lpattern(solid) || ///
	line y_b11 t, color(gray) lpattern(solid) || ///
	line y_b12 t, color(gray) lpattern(solid) || ///
	line y_b13 t, color(gray) lpattern(solid) || ///
	line y_b14 t, color(gray) lpattern(solid) || ///
	line y_b15 t, color(gray) lpattern(solid) || ///
	line diff_dji t, color(blue) lpattern(solid) lwidth(medthick) legend(off) lcolor(%30)
	*graph export "Bootstrap-Simulation/fall-2023-RR-submission/figures/schneider-troeger-rboot-syntheticplot.pdf", as(pdf) replace
 
* create containers to hold b's:
forv i = 1/30 { // empty vectors to store B's. k = 30
	gen b`i' = .
}
* here's the loop
timer on 1
loc place = 1
forv i = 1/2500 {
	noi di in y "On simulation #: `place'"
	if `place' >= 1001 {
		* STOP; 1000 betas created
	}
	else {
		cap arch y_b`i' golf_severity golf_possum golf_negsum pal_severity pal_possum pal_negsum yug_severity yug_possum yug_negsum l.y_b`i' diff_cac diff_ftse, arch(1) tarch(1) garch(1) het(golf_severity pal_severity yug_severity y91 y92 y93 y94 y95 y96 y97 y98 y99 y00) arch0(xb)
		if "`e(converged)'" == "1" {
			* post the B's to a vector
			forv z = 1/30 {
				mat B = e(b)
				qui replace b`z' = B[1,`z'] in `place' // replace empty beta w/ estimates
			}
			loc place = `place' + 1 // move down 1 beta row
		}
		else {
			* do nothing, re-estimate using a new Y
		}
	}
}
timer off 1
timer list 1
keep b1-b30
gen bootcount = _n
compress
save "data/schneider-troeger/schneider-troeger-betas-resid.dta", replace

* ----------------------------------------------------------------------------
